reference SCLC multiome analysis.Rmd for in depth description of sclc RDS.
sclc <- readRDS('/Users/smgroves/Box/multiome_data/M1/outs/TKO.rds')
sclc
## An object of class Seurat
## 467092 features across 8483 samples within 6 assays
## Active assay: ATAC (195588 features, 195588 variable features)
## 5 other assays present: RNA, chromvar, SCT, peaks, gene_activity
## 3 dimensional reductions calculated: lsi, umap, pca
# x <- sclc@assays$SCT@scale.data
x_pca <- read.csv('../data/pca-embedding.csv',header = TRUE,row.names = 1)
x_pca <- t(x_pca)
x_pca <- x_pca[1:20,] #keep only top 18 PCs
loadings<- read.csv('../data/pca-feature-loadings-projected.csv', header = TRUE, row.names = 1)
loadings <- as.matrix(loadings)
##################################
load("../data/arc.Robj", verbose=TRUE)
## Loading objects:
## arc
load("../data/arc_ks.Robj", verbose=TRUE)
## Loading objects:
## arc_ks
# Fit 5 archetypes
# arc <- fit_pch((x_pca), noc = 5)
# # Fit 5 archetypes with bootstrapping for robustness
# arc_rob = fit_pch_bootstrap(x_pca, n = 200, sample_prop = .8, seed = 2543, delta = 1,
# noc = 5)
# arc_ave <- average_pch_fits(arc_rob)
#
#
#
# arc_ks = ParetoTI::k_fit_pch(x_pca, ks = 2:8, check_installed = T,
# bootstrap = T, bootstrap_N = 200, maxiter = 1000,
# bootstrap_type = "m", seed = 2543,
# volume_ratio = "t_ratio", # set to "none" if too slow
# delta=0, conv_crit = 1e-04, order_type = "align",
# sample_prop = 0.75)
#
# # Show variance explained by a polytope with each k (cumulative)
ParetoTI::plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
#
#
# save(arc, file="../data/arc.Robj")
# save(arc_ks, file="../data/arc_ks.Robj")
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:ensembldb':
##
## filter, select
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
It looks like ciliated cells are dominating PC2, so we are going to remove those cells and just look at the four archetypes defining the other cell types.
sclc_small <- subset(x = sclc, subset = cluster != "Cilliated cells")
# write.csv(sclc@assays$SCT@scale.data, '../data/scaled-SCT-data.csv')
# write.csv(sclc@reductions$pca@cell.embeddings, '../data/pca-embedding.csv')
# write.csv(sclc@reductions$pca@feature.loadings, '../data/pca-feature-loadings.csv')
#
#
# write.csv(sclc@reductions$pca@feature.loadings.projected, '../data/pca-feature-loadings-projected.csv')
DefaultAssay(sclc_small) <- "RNA"
# store mitochondrial percentage in object meta data
sclc_small <- PercentageFeatureSet(sclc_small, pattern = "mt-", col.name = "percent.mt")
# run sctransform
# BiocManager::install("glmGamPoi")
library(glmGamPoi)
sclc_small <- SCTransform(sclc_small, vars.to.regress = "percent.mt", verbose = FALSE,method = "glmGamPoi")
# These are now standard steps in the Seurat workflow for visualization and clustering
sclc_small <- Seurat::ScaleData(object = sclc_small)
sclc_small <- Seurat::RunPCA(object = sclc_small)
sclc_small <- Seurat::ProjectDim(sclc_small, reduction = 'pca')
## PC_ 1
## Positive: Mecom, Scgb1a1, Col23a1, Rad51b, Atp11a, Ptpn13, Samd4, Prdm16, Runx1, Nckap5
## Rai14, Eya2, Lrmda, Por, Sfta3-ps, Ahnak, Il18r1, Slc4a5, Nfia, Myo5c
## Negative: Ptprt, Meis2, Nrxn1, Piezo2, Cdh13, Cacna1a, Nlgn1, Celf4, Slc8a1, Ctnna2
## mt-Nd1, Tcerg1l, Ptprn2, Zfpm2, Col8a1, Syt1, Pclo, Cntnap5b, Cacna2d1, Dscaml1
## PC_ 2
## Positive: Pbx1, 2610035D17Rik, Fndc3b, Dnaja1, Cdh1, Hsp90aa1, Gpr158, Egfr, Ralgapa2, Snap25
## Tenm4, Malat1, Hsph1, Chka, Shroom3, Neat1, Hspa4l, Slc18a1, Rgs7, Anxa1
## Negative: mt-Cytb, mt-Nd1, mt-Nd2, Cplx2, mt-Nd6, Gm42418, Sftpc, Lyz2, Hist1h1e, Nkain3
## Sftpa1, Adam19, Ddc, Fli1, Slc1a3, Ldb2, Eng, Hc, Adgrl3, Ptprb
## PC_ 3
## Positive: Hsp90aa1, Hsp90ab1, Hsph1, Dnaja1, Hspa1a, Hspa1b, Hspa8, Hspd1, Bag3, Hspb1
## Megf9, Mfsd2a, Utp14b, Rc3h2, Scg2, Dnajb1, Ugcg, Fam107b, Hsp90b1, Hspe1
## Negative: Col8a1, Nrxn1, Malat1, Rad51b, Pde4d, 9030622O22Rik, Nlgn1, Atp8a1, Sfta3-ps, Mecom
## Ctnna2, Grid2, Agbl4, Pbx1, Lrmda, Kcnip1, Fos, Tcerg1l, Ptprn2, Col23a1
## PC_ 4
## Positive: Mki67, Top2a, Cenpe, Cenpf, Kif15, Knl1, Kif20b, Aspm, Hmgb2, Tpx2
## Hist1h1e, Hist1h1d, Anln, Smc4, Cdca2, Nusap1, Clspn, Incenp, Hist1h1a, Hist1h1c
## Negative: Gpr158, Ralgapa2, Dscaml1, Dlgap1, Egfr, Ptprn, Tenm4, 2610035D17Rik, Fndc3b, Celf4
## Rps6ka2, Tmtc1, Phldb2, Cntnap5b, Pbx1, March11, Prkca, Oxr1, Cdh13, Ank3
## PC_ 5
## Positive: Sftpc, Flt1, Fendrr, Afap1l1, Slc34a2, Adgrl3, Cd36, Ldb2, Prex2, Eng
## Hc, Sftpa1, Ptprb, Calcrl, Hmcn1, Lyz2, Sftpb, Col4a1, Pecam1, Bmp1
## Negative: Hsp90aa1, Hsp90ab1, Dnaja1, Hsph1, Hspa1a, Hspa1b, Dnajb1, Ubc, Hspd1, Hspa8
## Ptprj, Slc4a4, Itprid1, Ptpn13, Col15a1, Por, Samd4, Large1, Hspb1, B4galt5
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.pca; see ?make.names for more details on
## syntax validity
x_pca_small <- sclc_small@reductions$pca@cell.embeddings
x_pca_small <- t(x_pca_small)
x_pca_small <- x_pca_small[1:20,] #keep only top 18 PCs
loadings<- sclc_small@reductions$pca@feature.loadings.projected
loadings <- as.matrix(loadings)
##################################
# Fit 4 archetypes
arc <- fit_pch((x_pca_small), noc = 4)
## Warning: Python '/Users/smgroves/Library/r-miniconda/envs/reticulate_PCHA/
## bin/python' was requested but '/Users/smgroves/Documents/anaconda3/envs/
## reticulate_PCHA/bin/python3.7' was loaded instead (see reticulate::py_config()
## for more information)
# Fit 4 archetypes with bootstrapping for robustness
arc_rob = fit_pch_bootstrap(x_pca_small, n = 200, sample_prop = .8, seed = 2543, delta = 1,
noc = 4)
# arc_ave <- average_pch_fits(arc_rob)
#
#
#
# arc_ks = ParetoTI::k_fit_pch(x_pca_small, ks = 2:8, check_installed = T,
# bootstrap = T, bootstrap_N = 200, maxiter = 1000,
# bootstrap_type = "m", seed = 2543,
# volume_ratio = "t_ratio", # set to "none" if too slow
# delta=0, conv_crit = 1e-04, order_type = "align",
# sample_prop = 0.75)
# Show variance explained by a polytope with each k (cumulative)
# ParetoTI::plot_arc_var(arc_ks, type = "varexpl", point_size = 2, line_size = 1.5) + theme_bw()
p_pca = plot_arc(arc_data = arc, data = x_pca_small,
which_dimensions = 1:3, line_size = 1.5,
data_lab = as.character(Seurat::Idents(sclc_small)),
colors = palette(rainbow(20)),
text_size = 60, data_size = 4)
plotly::layout(p_pca, title = "Average Archetypes for Top 20 PCs")
#